home *** CD-ROM | disk | FTP | other *** search
- /*
- * boolean_op.c --
- * SCCS Status : %W% %G%
- * Author : Huynh Quoc T. Tung
- * Created On : Sun Oct 17 16:40:46 1993
- * Last Modified By: Huynh Quoc T. Tung
- * Last Modified On: Tue Oct 19 08:36:47 1993
- * Update Count : 2
- * Status : Unknown, Use with caution!
- */
-
- #include <stdio.h>
- #include <string.h>
- #include "irfiles.h"
- #include "boolean_op.h"
-
- struct node
- {
- long key;
- struct node *next;
- };
- struct node *head, *z, *t;
-
- extern long number_of_qwords;
- extern double *document_score_array;
- extern search_result_struct* search_result_array;
-
- search_result_struct *end_result = NULL;
-
- boolean IsOperator(op)
- char *op;
- {
- if(!strcmp(op,"and") ||
- !strcmp(op,"or") ||
- !strcmp(op,"not"))
- return(true);
- else return(false);
- }
-
- void stackinit()
- {
- head = (struct node *) malloc(sizeof (* head));
- z = (struct node *) malloc(sizeof(* z));
- head->next = z;
- head->key = 0;
- z->next = z;
- }
-
- void push(v)
- long v;
- {
- t = (struct node *) malloc(sizeof(* t));
- t->key = v;
- t->next = head->next;
- head->next = t;
- }
-
- long pop()
- {
- long x;
-
- t = head->next;
- head->next = t->next;
- x = t->key;
- free(t);
- return x;
- }
-
- int stackempty()
- {
- return head->next == z;
- }
-
- void Union(result, set1, set2)
- search_result_struct *result;
- search_result_struct *set1;
- search_result_struct *set2;
- {
- while((set1->number_of_hits != 0) &&
- (set2->number_of_hits != 0)) {
- if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- result->doc_ids_array->score = set1->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set1->number_of_hits;
- ++set1->doc_ids_array;
- }
- else if(set1->doc_ids_array->doc_id > set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set2->doc_ids_array->doc_id;
- result->doc_ids_array->score = set2->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set2->number_of_hits;
- ++set2->doc_ids_array;
- }
- else { /* doc_id1 == doc_id2 { */
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set1->number_of_hits; --set2->number_of_hits;
- ++set1->doc_ids_array; ++set2->doc_ids_array;
- }
- }
- if((set1->number_of_hits == 0) &&
- (set2->number_of_hits != 0)) {
- memcpy((char *)result->doc_ids_array,
- set2->doc_ids_array,
- set2->number_of_hits * sizeof(doc_descr_struct));
- set2->doc_ids_array += set2->number_of_hits;
- result->doc_ids_array += set2->number_of_hits;
- result->number_of_hits += set2->number_of_hits;
- set2->number_of_hits = 0;
- }
- else if((set1->number_of_hits != 0) &&
- (set2->number_of_hits == 0)) {
- memcpy((char *)result->doc_ids_array,
- set1->doc_ids_array,
- set1->number_of_hits * sizeof(doc_descr_struct));
- set1->doc_ids_array += set1->number_of_hits;
- result->doc_ids_array += set1->number_of_hits;
- result->number_of_hits += set1->number_of_hits;
- set1->number_of_hits = 0;
- }
- }
-
- void Or_Operator(operand1, operand2)
- long operand1;
- long operand2;
- {
- search_result_struct *op1, *op2;
- long doc_score_size = sizeof(search_result_struct);
- long doc_ids_array_size = sizeof(doc_descr_struct);
- long op1_number_of_hits, op2_number_of_hits;
-
- op1 = &search_result_array[operand1];
- op2 = &search_result_array[operand2];
- op1_number_of_hits = op1->number_of_hits;
- op2_number_of_hits = op2->number_of_hits;
-
- if(op1->number_of_hits == 0) {
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->number_of_hits > 0) {
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- op2->number_of_hits * doc_ids_array_size);
- memcpy((char *)end_result->doc_ids_array,
- (char *)op2->doc_ids_array,
- doc_ids_array_size * op2->number_of_hits);
- }
- end_result->number_of_hits = op2->number_of_hits;
- push(op2->word_id);
- }
- else if(op2->number_of_hits == 0) {
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- if(op1->number_of_hits > 0) {
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- op1->number_of_hits * doc_ids_array_size);
- memcpy((char *)end_result->doc_ids_array,
- (char *)op1->doc_ids_array,
- doc_ids_array_size * op1->number_of_hits);
- }
- end_result->number_of_hits = op1->number_of_hits;
- push(op1->word_id);
- }
- else if((op1->number_of_hits != 0) &&
- (op2->number_of_hits != 0)) {
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- (op1->number_of_hits + op2->number_of_hits) *
- doc_ids_array_size);
- end_result->number_of_hits = 0;
-
- Union(end_result, op1, op2);
-
- op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
- op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
- end_result->doc_ids_array -= end_result->number_of_hits;
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op1->doc_ids_array);
- if(end_result->number_of_hits > 0) {
- op2->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(op2->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- memcpy((char *)op2->doc_ids_array,
- (char *)end_result->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- }
- else {
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- }
- push(op2->word_id);
- }
- }
-
- void Intersection(result, set1, set2)
- search_result_struct * result;
- search_result_struct * set1;
- search_result_struct * set2;
- {
- while((set1->number_of_hits != 0) &&
- (set2->number_of_hits != 0)) {
- if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set1->number_of_hits; --set2->number_of_hits;
- ++set1->doc_ids_array ; ++set2->doc_ids_array ;
- }
- else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
- --set1->number_of_hits;
- ++set1->doc_ids_array ;
- }
- else /* doc_id1 > doc_id2 */
- {
- --set2->number_of_hits;
- ++set2->doc_ids_array ;
- }
- }
- }
-
- void And_Operator(operand1, operand2)
- long operand1;
- long operand2;
- {
- search_result_struct *op1, *op2;
- long doc_score_size = sizeof(search_result_struct);
- long doc_ids_array_size = sizeof(doc_descr_struct);
- long op1_number_of_hits, op2_number_of_hits;
-
- op1 = &search_result_array[operand1];
- op2 = &search_result_array[operand2];
- op1_number_of_hits = op1->number_of_hits;
- op2_number_of_hits = op2->number_of_hits;
-
- if(op1->number_of_hits == 0) {
- end_result->number_of_hits = 0;
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- push(op1->word_id);
- }
- else if(op2->number_of_hits == 0) {
- end_result->number_of_hits = 0;
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- push(op2->word_id);
- }
- else if((op1->number_of_hits != 0) &&
- (op2->number_of_hits != 0)) {
- if(op1->number_of_hits > op2->number_of_hits)
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- op2->number_of_hits * doc_ids_array_size);
- else
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- op1->number_of_hits * doc_ids_array_size);
- end_result->number_of_hits = 0;
-
- Intersection(end_result, op1, op2);
-
- op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
- op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
- end_result->doc_ids_array -= end_result->number_of_hits;
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op1->doc_ids_array);
- if(end_result->number_of_hits > 0) {
- op2->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(op2->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- memcpy((char *)op2->doc_ids_array,
- (char *)end_result->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- }
- else {
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- }
- push(op2->word_id);
- }
- }
-
- void Difference(result, set1, set2)
- search_result_struct *result;
- search_result_struct *set1;
- search_result_struct *set2;
- {
- while((set1->number_of_hits != 0) &&
- (set2->number_of_hits != 0)) {
- if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
- --set1->number_of_hits; --set2->number_of_hits;
- ++set1->doc_ids_array; ++set2->doc_ids_array;
- }
- else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- result->doc_ids_array->score = set1->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array;
- --set1->number_of_hits;
- ++set1->doc_ids_array;
- }
- else /* doc_id1 > doc_id2 */ {
- --set2->number_of_hits;
- ++set2->doc_ids_array;
- }
- }
- if((set1->number_of_hits != 0) &&
- (set2->number_of_hits == 0)) {
- memcpy((char *)result->doc_ids_array,
- set1->doc_ids_array,
- set1->number_of_hits * sizeof(doc_descr_struct));
- set1->doc_ids_array += set1->number_of_hits;
- result->doc_ids_array += set1->number_of_hits;
- result->number_of_hits += set1->number_of_hits;
- set1->number_of_hits = 0;
- }
- }
-
- void Not_Operator( operand1, operand2)
- long operand1;
- long operand2;
- {
- search_result_struct *op1, *op2;
- long doc_score_size = sizeof(search_result_struct);
- long doc_ids_array_size = sizeof(doc_descr_struct);
- long op1_number_of_hits, op2_number_of_hits;
-
- op1 = &search_result_array[operand1];
- op2 = &search_result_array[operand2];
- op1_number_of_hits = op1->number_of_hits;
- op2_number_of_hits = op2->number_of_hits;
-
- if(op1->number_of_hits == 0) {
- end_result->number_of_hits = 0;
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- push(op1->word_id);
- }
- else if(op2->number_of_hits == 0) {
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- if(op1->number_of_hits > 0) {
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- op1->number_of_hits * doc_ids_array_size);
- memcpy((char *)end_result->doc_ids_array,
- (char *)op1->doc_ids_array,
- doc_ids_array_size * op1->number_of_hits + 1);
- }
- end_result->number_of_hits = op1->number_of_hits;
- push(op1->word_id);
- }
- else if((op1->number_of_hits != 0) &&
- (op2->number_of_hits != 0)) {
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- (op1->number_of_hits + op2->number_of_hits) *
- doc_ids_array_size);
- end_result->number_of_hits = 0;
-
- Difference(end_result, op1, op2);
-
- op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
- op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
- end_result->doc_ids_array -= end_result->number_of_hits;
- op1->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- if(end_result->number_of_hits > 0) {
- op1->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(op1->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- memcpy((char *)op1->doc_ids_array,
- (char *)end_result->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- }
- else {
- op1->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- }
- push(op1->word_id);
- }
- }
-
- boolean stackinitialized = false;
-
- boolean boolean_operations(operator)
- char *operator;
- {
- long word_id1, word_id2;
- #ifndef WIN32
- int i;
- #endif
-
- word_id1 = word_id2 = -1L;
-
- if (!stackinitialized) {
- stackinit();
- stackinitialized = true;
- }
- if (!strcmp(operator, "or")) {
- if(stackempty())
- word_id1 = -1L;
- else word_id1 = pop();
- if(stackempty())
- word_id2 = -1L;
- else word_id2 = pop();
- if((word_id1 == -1) || (word_id2 == -1))
- return(false);
- /*waislog(WLOG_HIGH, WLOG_ERROR,
- "boolean search failed, too few operands.\n");*/
- else Or_Operator(word_id1, word_id2);
- }
- else if (!strcmp(operator, "and")) {
- if(stackempty())
- word_id1 = -1L;
- else word_id1 = pop();
- if(stackempty())
- word_id2 = -1L;
- else word_id2 = pop();
- if((word_id1 == -1) || (word_id2 == -1))
- return(false);
- /*waislog(WLOG_HIGH, WLOG_ERROR,
- "boolean search failed, too few operands.\n");*/
- else And_Operator(word_id1, word_id2);
- }
- else if (!strcmp(operator, "not")) {
- if(stackempty())
- word_id1 = -1L;
- else word_id1 = pop();
- if(stackempty())
- word_id2 = -1L;
- else word_id2 = pop();
- if((word_id1 == -1) || (word_id2 == -1))
- return(false);
- /*waislog(WLOG_HIGH, WLOG_ERROR,
- "boolean search failed, too few operands.\n");*/
- else Not_Operator(word_id2, word_id1);
- }
- }
-
- void save_word_id(word_id)
- long word_id;
- {
- if (!stackinitialized) {
- stackinit();
- stackinitialized = true;
- }
- if(end_result == NULL)
- end_result =
- (search_result_struct *)s_malloc(sizeof(search_result_struct));
- if(end_result->doc_ids_array == NULL)
- end_result->doc_ids_array =
- (doc_descr_struct *)
- s_malloc((search_result_array[word_id].number_of_hits + 1) *
- sizeof(doc_descr_struct));
- else end_result->doc_ids_array =
- (doc_descr_struct *)
- s_realloc(end_result->doc_ids_array,
- (search_result_array[word_id].number_of_hits + 1) *
- sizeof(doc_descr_struct));
- end_result->number_of_hits = search_result_array[word_id].number_of_hits;
- if(search_result_array[word_id].doc_ids_array != NULL)
- memcpy((char*)end_result->doc_ids_array,
- (char*)search_result_array[word_id].doc_ids_array,
- search_result_array[word_id].number_of_hits *
- sizeof(doc_descr_struct));
- push(word_id);
- }
-
- long retriev_result(entries)
- long entries;
- {
- int doc_id, count;
- long number_of_hits = 0;
-
- if((end_result != NULL) && (document_score_array != NULL))
- for(count=0; count < end_result->number_of_hits; count++) {
- doc_id = end_result->doc_ids_array[count].doc_id;
- document_score_array[doc_id] = end_result->doc_ids_array[count].score;
- ++number_of_hits;
- }
- number_of_qwords = 0;
- stackinitialized = false;
- if(end_result != NULL) {
- if(end_result->doc_ids_array != NULL)
- s_free(end_result->doc_ids_array);
- s_free(end_result);
- }
- if(search_result_array != NULL)
- s_free(search_result_array);
- return(number_of_hits);
- }
-